{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DAY 4 认识pandas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据的读取和查看"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 读取数据\n",
    "import pandas as pd\n",
    "data = pd.read_csv(r'data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(data) # 类"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "方法 ：人.跳舞()\n",
    "属性：人.年龄"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>Home Ownership</th>\n",
       "      <th>Annual Income</th>\n",
       "      <th>Years in current job</th>\n",
       "      <th>Tax Liens</th>\n",
       "      <th>Number of Open Accounts</th>\n",
       "      <th>Years of Credit History</th>\n",
       "      <th>Maximum Open Credit</th>\n",
       "      <th>Number of Credit Problems</th>\n",
       "      <th>Months since last delinquent</th>\n",
       "      <th>Bankruptcies</th>\n",
       "      <th>Purpose</th>\n",
       "      <th>Term</th>\n",
       "      <th>Current Loan Amount</th>\n",
       "      <th>Current Credit Balance</th>\n",
       "      <th>Monthly Debt</th>\n",
       "      <th>Credit Score</th>\n",
       "      <th>Credit Default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7495</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7496</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7497</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7498</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7499</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7500 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Id  Home Ownership  Annual Income  Years in current job  Tax Liens  \\\n",
       "0     False           False          False                  True      False   \n",
       "1     False           False          False                 False      False   \n",
       "2     False           False          False                 False      False   \n",
       "3     False           False          False                 False      False   \n",
       "4     False           False          False                 False      False   \n",
       "...     ...             ...            ...                   ...        ...   \n",
       "7495  False           False          False                 False      False   \n",
       "7496  False           False          False                 False      False   \n",
       "7497  False           False          False                 False      False   \n",
       "7498  False           False           True                  True      False   \n",
       "7499  False           False           True                 False      False   \n",
       "\n",
       "      Number of Open Accounts  Years of Credit History  Maximum Open Credit  \\\n",
       "0                       False                    False                False   \n",
       "1                       False                    False                False   \n",
       "2                       False                    False                False   \n",
       "3                       False                    False                False   \n",
       "4                       False                    False                False   \n",
       "...                       ...                      ...                  ...   \n",
       "7495                    False                    False                False   \n",
       "7496                    False                    False                False   \n",
       "7497                    False                    False                False   \n",
       "7498                    False                    False                False   \n",
       "7499                    False                    False                False   \n",
       "\n",
       "      Number of Credit Problems  Months since last delinquent  Bankruptcies  \\\n",
       "0                         False                          True         False   \n",
       "1                         False                          True         False   \n",
       "2                         False                          True         False   \n",
       "3                         False                          True         False   \n",
       "4                         False                          True         False   \n",
       "...                         ...                           ...           ...   \n",
       "7495                      False                          True         False   \n",
       "7496                      False                         False         False   \n",
       "7497                      False                          True         False   \n",
       "7498                      False                          True         False   \n",
       "7499                      False                          True         False   \n",
       "\n",
       "      Purpose   Term  Current Loan Amount  Current Credit Balance  \\\n",
       "0       False  False                False                   False   \n",
       "1       False  False                False                   False   \n",
       "2       False  False                False                   False   \n",
       "3       False  False                False                   False   \n",
       "4       False  False                False                   False   \n",
       "...       ...    ...                  ...                     ...   \n",
       "7495    False  False                False                   False   \n",
       "7496    False  False                False                   False   \n",
       "7497    False  False                False                   False   \n",
       "7498    False  False                False                   False   \n",
       "7499    False  False                False                   False   \n",
       "\n",
       "      Monthly Debt  Credit Score  Credit Default  \n",
       "0            False         False           False  \n",
       "1            False         False           False  \n",
       "2            False         False           False  \n",
       "3            False         False           False  \n",
       "4            False         False           False  \n",
       "...            ...           ...             ...  \n",
       "7495         False         False           False  \n",
       "7496         False         False           False  \n",
       "7497         False         False           False  \n",
       "7498         False          True           False  \n",
       "7499         False          True           False  \n",
       "\n",
       "[7500 rows x 18 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull()            # 布尔矩阵显示缺失值，这个方法返回一个布尔矩阵，也是dataframe对象，其中True表示对应位置的值是缺失值，False表示对应位置的值不是缺失值。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>Home Ownership</th>\n",
       "      <th>Annual Income</th>\n",
       "      <th>Years in current job</th>\n",
       "      <th>Tax Liens</th>\n",
       "      <th>Number of Open Accounts</th>\n",
       "      <th>Years of Credit History</th>\n",
       "      <th>Maximum Open Credit</th>\n",
       "      <th>Number of Credit Problems</th>\n",
       "      <th>Months since last delinquent</th>\n",
       "      <th>Bankruptcies</th>\n",
       "      <th>Purpose</th>\n",
       "      <th>Term</th>\n",
       "      <th>Current Loan Amount</th>\n",
       "      <th>Current Credit Balance</th>\n",
       "      <th>Monthly Debt</th>\n",
       "      <th>Credit Score</th>\n",
       "      <th>Credit Default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Own Home</td>\n",
       "      <td>482087.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>26.3</td>\n",
       "      <td>685960.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>99999999.0</td>\n",
       "      <td>47386.0</td>\n",
       "      <td>7914.0</td>\n",
       "      <td>749.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Own Home</td>\n",
       "      <td>1025487.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>15.3</td>\n",
       "      <td>1181730.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Long Term</td>\n",
       "      <td>264968.0</td>\n",
       "      <td>394972.0</td>\n",
       "      <td>18373.0</td>\n",
       "      <td>737.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>751412.0</td>\n",
       "      <td>8 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1182434.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>99999999.0</td>\n",
       "      <td>308389.0</td>\n",
       "      <td>13651.0</td>\n",
       "      <td>742.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Own Home</td>\n",
       "      <td>805068.0</td>\n",
       "      <td>6 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>22.5</td>\n",
       "      <td>147400.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>121396.0</td>\n",
       "      <td>95855.0</td>\n",
       "      <td>11338.0</td>\n",
       "      <td>694.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Rent</td>\n",
       "      <td>776264.0</td>\n",
       "      <td>8 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>13.6</td>\n",
       "      <td>385836.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>125840.0</td>\n",
       "      <td>93309.0</td>\n",
       "      <td>7180.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>Rent</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>14.6</td>\n",
       "      <td>366784.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>other</td>\n",
       "      <td>Long Term</td>\n",
       "      <td>337304.0</td>\n",
       "      <td>165680.0</td>\n",
       "      <td>18692.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>1511108.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>20.3</td>\n",
       "      <td>388124.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>home improvements</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>99999999.0</td>\n",
       "      <td>51623.0</td>\n",
       "      <td>2317.0</td>\n",
       "      <td>745.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>Rent</td>\n",
       "      <td>1040060.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>330374.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>other</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>250888.0</td>\n",
       "      <td>89015.0</td>\n",
       "      <td>19761.0</td>\n",
       "      <td>705.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>home improvements</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>129734.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1 year</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>24.6</td>\n",
       "      <td>511302.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Long Term</td>\n",
       "      <td>572880.0</td>\n",
       "      <td>205333.0</td>\n",
       "      <td>17613.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Id Home Ownership  Annual Income Years in current job  Tax Liens  \\\n",
       "0   0       Own Home       482087.0                  NaN        0.0   \n",
       "1   1       Own Home      1025487.0            10+ years        0.0   \n",
       "2   2  Home Mortgage       751412.0              8 years        0.0   \n",
       "3   3       Own Home       805068.0              6 years        0.0   \n",
       "4   4           Rent       776264.0              8 years        0.0   \n",
       "5   5           Rent            NaN              7 years        0.0   \n",
       "6   6  Home Mortgage      1511108.0            10+ years        0.0   \n",
       "7   7           Rent      1040060.0            10+ years        0.0   \n",
       "8   8  Home Mortgage            NaN              5 years        0.0   \n",
       "9   9  Home Mortgage            NaN               1 year        0.0   \n",
       "\n",
       "   Number of Open Accounts  Years of Credit History  Maximum Open Credit  \\\n",
       "0                     11.0                     26.3             685960.0   \n",
       "1                     15.0                     15.3            1181730.0   \n",
       "2                     11.0                     35.0            1182434.0   \n",
       "3                      8.0                     22.5             147400.0   \n",
       "4                     13.0                     13.6             385836.0   \n",
       "5                     12.0                     14.6             366784.0   \n",
       "6                      9.0                     20.3             388124.0   \n",
       "7                     13.0                     12.0             330374.0   \n",
       "8                     17.0                     15.7                  0.0   \n",
       "9                     10.0                     24.6             511302.0   \n",
       "\n",
       "   Number of Credit Problems  Months since last delinquent  Bankruptcies  \\\n",
       "0                        1.0                           NaN           1.0   \n",
       "1                        0.0                           NaN           0.0   \n",
       "2                        0.0                           NaN           0.0   \n",
       "3                        1.0                           NaN           1.0   \n",
       "4                        1.0                           NaN           0.0   \n",
       "5                        0.0                           NaN           0.0   \n",
       "6                        0.0                          73.0           0.0   \n",
       "7                        0.0                          18.0           0.0   \n",
       "8                        1.0                           NaN           1.0   \n",
       "9                        0.0                           6.0           0.0   \n",
       "\n",
       "              Purpose        Term  Current Loan Amount  \\\n",
       "0  debt consolidation  Short Term           99999999.0   \n",
       "1  debt consolidation   Long Term             264968.0   \n",
       "2  debt consolidation  Short Term           99999999.0   \n",
       "3  debt consolidation  Short Term             121396.0   \n",
       "4  debt consolidation  Short Term             125840.0   \n",
       "5               other   Long Term             337304.0   \n",
       "6   home improvements  Short Term           99999999.0   \n",
       "7               other  Short Term             250888.0   \n",
       "8   home improvements  Short Term             129734.0   \n",
       "9  debt consolidation   Long Term             572880.0   \n",
       "\n",
       "   Current Credit Balance  Monthly Debt  Credit Score  Credit Default  \n",
       "0                 47386.0        7914.0         749.0               0  \n",
       "1                394972.0       18373.0         737.0               1  \n",
       "2                308389.0       13651.0         742.0               0  \n",
       "3                 95855.0       11338.0         694.0               0  \n",
       "4                 93309.0        7180.0         719.0               0  \n",
       "5                165680.0       18692.0           NaN               1  \n",
       "6                 51623.0        2317.0         745.0               0  \n",
       "7                 89015.0       19761.0         705.0               1  \n",
       "8                    19.0          17.0           NaN               0  \n",
       "9                205333.0       17613.0           NaN               1  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>Home Ownership</th>\n",
       "      <th>Annual Income</th>\n",
       "      <th>Years in current job</th>\n",
       "      <th>Tax Liens</th>\n",
       "      <th>Number of Open Accounts</th>\n",
       "      <th>Years of Credit History</th>\n",
       "      <th>Maximum Open Credit</th>\n",
       "      <th>Number of Credit Problems</th>\n",
       "      <th>Months since last delinquent</th>\n",
       "      <th>Bankruptcies</th>\n",
       "      <th>Purpose</th>\n",
       "      <th>Term</th>\n",
       "      <th>Current Loan Amount</th>\n",
       "      <th>Current Credit Balance</th>\n",
       "      <th>Monthly Debt</th>\n",
       "      <th>Credit Score</th>\n",
       "      <th>Credit Default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Own Home</td>\n",
       "      <td>482087.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>26.3</td>\n",
       "      <td>685960</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>99999999</td>\n",
       "      <td>47386</td>\n",
       "      <td>7914</td>\n",
       "      <td>749.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Own Home</td>\n",
       "      <td>1025487.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>15.3</td>\n",
       "      <td>1181730</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Long Term</td>\n",
       "      <td>264968</td>\n",
       "      <td>394972</td>\n",
       "      <td>18373</td>\n",
       "      <td>737.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>751412.0</td>\n",
       "      <td>8 years</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1182434</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>99999999</td>\n",
       "      <td>308389</td>\n",
       "      <td>13651</td>\n",
       "      <td>742.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Own Home</td>\n",
       "      <td>805068.0</td>\n",
       "      <td>6 years</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>22.5</td>\n",
       "      <td>147400</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>121396</td>\n",
       "      <td>95855</td>\n",
       "      <td>11338</td>\n",
       "      <td>694.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Rent</td>\n",
       "      <td>776264.0</td>\n",
       "      <td>8 years</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>13.6</td>\n",
       "      <td>385836</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>125840</td>\n",
       "      <td>93309</td>\n",
       "      <td>7180</td>\n",
       "      <td>719.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7495</th>\n",
       "      <td>7495</td>\n",
       "      <td>Rent</td>\n",
       "      <td>402192.0</td>\n",
       "      <td>&lt; 1 year</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>8.5</td>\n",
       "      <td>107866</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>other</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>129360</td>\n",
       "      <td>73492</td>\n",
       "      <td>1900</td>\n",
       "      <td>697.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7496</th>\n",
       "      <td>7496</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>1533984.0</td>\n",
       "      <td>1 year</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>26.5</td>\n",
       "      <td>686312</td>\n",
       "      <td>0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Long Term</td>\n",
       "      <td>444048</td>\n",
       "      <td>456399</td>\n",
       "      <td>12783</td>\n",
       "      <td>7410.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7497</th>\n",
       "      <td>7497</td>\n",
       "      <td>Rent</td>\n",
       "      <td>1878910.0</td>\n",
       "      <td>6 years</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>32.1</td>\n",
       "      <td>1778920</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>buy a car</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>99999999</td>\n",
       "      <td>477812</td>\n",
       "      <td>12479</td>\n",
       "      <td>748.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7498</th>\n",
       "      <td>7498</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>26.5</td>\n",
       "      <td>1141250</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>615274</td>\n",
       "      <td>476064</td>\n",
       "      <td>37118</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7499</th>\n",
       "      <td>7499</td>\n",
       "      <td>Rent</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4 years</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>9.4</td>\n",
       "      <td>480832</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>26928</td>\n",
       "      <td>288192</td>\n",
       "      <td>9061</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7500 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Id Home Ownership  Annual Income Years in current job  Tax Liens  \\\n",
       "0        0       Own Home       482087.0                  NaN          0   \n",
       "1        1       Own Home      1025487.0            10+ years          0   \n",
       "2        2  Home Mortgage       751412.0              8 years          0   \n",
       "3        3       Own Home       805068.0              6 years          0   \n",
       "4        4           Rent       776264.0              8 years          0   \n",
       "...    ...            ...            ...                  ...        ...   \n",
       "7495  7495           Rent       402192.0             < 1 year          0   \n",
       "7496  7496  Home Mortgage      1533984.0               1 year          0   \n",
       "7497  7497           Rent      1878910.0              6 years          0   \n",
       "7498  7498  Home Mortgage            NaN                  NaN          0   \n",
       "7499  7499           Rent            NaN              4 years          0   \n",
       "\n",
       "      Number of Open Accounts  Years of Credit History  Maximum Open Credit  \\\n",
       "0                          11                     26.3               685960   \n",
       "1                          15                     15.3              1181730   \n",
       "2                          11                     35.0              1182434   \n",
       "3                           8                     22.5               147400   \n",
       "4                          13                     13.6               385836   \n",
       "...                       ...                      ...                  ...   \n",
       "7495                        3                      8.5               107866   \n",
       "7496                       10                     26.5               686312   \n",
       "7497                       12                     32.1              1778920   \n",
       "7498                       21                     26.5              1141250   \n",
       "7499                        8                      9.4               480832   \n",
       "\n",
       "      Number of Credit Problems  Months since last delinquent  Bankruptcies  \\\n",
       "0                             1                           NaN           1.0   \n",
       "1                             0                           NaN           0.0   \n",
       "2                             0                           NaN           0.0   \n",
       "3                             1                           NaN           1.0   \n",
       "4                             1                           NaN           0.0   \n",
       "...                         ...                           ...           ...   \n",
       "7495                          0                           NaN           0.0   \n",
       "7496                          0                          43.0           0.0   \n",
       "7497                          0                           NaN           0.0   \n",
       "7498                          0                           NaN           0.0   \n",
       "7499                          0                           NaN           0.0   \n",
       "\n",
       "                 Purpose        Term  Current Loan Amount  \\\n",
       "0     debt consolidation  Short Term             99999999   \n",
       "1     debt consolidation   Long Term               264968   \n",
       "2     debt consolidation  Short Term             99999999   \n",
       "3     debt consolidation  Short Term               121396   \n",
       "4     debt consolidation  Short Term               125840   \n",
       "...                  ...         ...                  ...   \n",
       "7495               other  Short Term               129360   \n",
       "7496  debt consolidation   Long Term               444048   \n",
       "7497           buy a car  Short Term             99999999   \n",
       "7498  debt consolidation  Short Term               615274   \n",
       "7499  debt consolidation  Short Term                26928   \n",
       "\n",
       "      Current Credit Balance  Monthly Debt  Credit Score  Credit Default  \n",
       "0                      47386          7914         749.0               0  \n",
       "1                     394972         18373         737.0               1  \n",
       "2                     308389         13651         742.0               0  \n",
       "3                      95855         11338         694.0               0  \n",
       "4                      93309          7180         719.0               0  \n",
       "...                      ...           ...           ...             ...  \n",
       "7495                   73492          1900         697.0               0  \n",
       "7496                  456399         12783        7410.0               1  \n",
       "7497                  477812         12479         748.0               0  \n",
       "7498                  476064         37118           NaN               0  \n",
       "7499                  288192          9061           NaN               0  \n",
       "\n",
       "[7500 rows x 18 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# pip install openpyxl\n",
    "# pandas读取excel需要安装openpyxl库，去anaconda prompt中安装\n",
    "data2 =pd.read_excel('data.xlsx')\n",
    "data2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>Home Ownership</th>\n",
       "      <th>Annual Income</th>\n",
       "      <th>Years in current job</th>\n",
       "      <th>Tax Liens</th>\n",
       "      <th>Number of Open Accounts</th>\n",
       "      <th>Years of Credit History</th>\n",
       "      <th>Maximum Open Credit</th>\n",
       "      <th>Number of Credit Problems</th>\n",
       "      <th>Months since last delinquent</th>\n",
       "      <th>Bankruptcies</th>\n",
       "      <th>Purpose</th>\n",
       "      <th>Term</th>\n",
       "      <th>Current Loan Amount</th>\n",
       "      <th>Current Credit Balance</th>\n",
       "      <th>Monthly Debt</th>\n",
       "      <th>Credit Score</th>\n",
       "      <th>Credit Default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Own Home</td>\n",
       "      <td>482087.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>26.3</td>\n",
       "      <td>685960.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>99999999.0</td>\n",
       "      <td>47386.0</td>\n",
       "      <td>7914.0</td>\n",
       "      <td>749.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Own Home</td>\n",
       "      <td>1025487.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>15.3</td>\n",
       "      <td>1181730.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Long Term</td>\n",
       "      <td>264968.0</td>\n",
       "      <td>394972.0</td>\n",
       "      <td>18373.0</td>\n",
       "      <td>737.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>751412.0</td>\n",
       "      <td>8 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1182434.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>99999999.0</td>\n",
       "      <td>308389.0</td>\n",
       "      <td>13651.0</td>\n",
       "      <td>742.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Own Home</td>\n",
       "      <td>805068.0</td>\n",
       "      <td>6 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>22.5</td>\n",
       "      <td>147400.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>121396.0</td>\n",
       "      <td>95855.0</td>\n",
       "      <td>11338.0</td>\n",
       "      <td>694.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Rent</td>\n",
       "      <td>776264.0</td>\n",
       "      <td>8 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>13.6</td>\n",
       "      <td>385836.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>125840.0</td>\n",
       "      <td>93309.0</td>\n",
       "      <td>7180.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>Rent</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>14.6</td>\n",
       "      <td>366784.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>other</td>\n",
       "      <td>Long Term</td>\n",
       "      <td>337304.0</td>\n",
       "      <td>165680.0</td>\n",
       "      <td>18692.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>1511108.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>20.3</td>\n",
       "      <td>388124.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>home improvements</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>99999999.0</td>\n",
       "      <td>51623.0</td>\n",
       "      <td>2317.0</td>\n",
       "      <td>745.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>Rent</td>\n",
       "      <td>1040060.0</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>330374.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>other</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>250888.0</td>\n",
       "      <td>89015.0</td>\n",
       "      <td>19761.0</td>\n",
       "      <td>705.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5 years</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>15.7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>home improvements</td>\n",
       "      <td>Short Term</td>\n",
       "      <td>129734.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>Home Mortgage</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1 year</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>24.6</td>\n",
       "      <td>511302.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>debt consolidation</td>\n",
       "      <td>Long Term</td>\n",
       "      <td>572880.0</td>\n",
       "      <td>205333.0</td>\n",
       "      <td>17613.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Id Home Ownership  Annual Income Years in current job  Tax Liens  \\\n",
       "0   0       Own Home       482087.0                  NaN        0.0   \n",
       "1   1       Own Home      1025487.0            10+ years        0.0   \n",
       "2   2  Home Mortgage       751412.0              8 years        0.0   \n",
       "3   3       Own Home       805068.0              6 years        0.0   \n",
       "4   4           Rent       776264.0              8 years        0.0   \n",
       "5   5           Rent            NaN              7 years        0.0   \n",
       "6   6  Home Mortgage      1511108.0            10+ years        0.0   \n",
       "7   7           Rent      1040060.0            10+ years        0.0   \n",
       "8   8  Home Mortgage            NaN              5 years        0.0   \n",
       "9   9  Home Mortgage            NaN               1 year        0.0   \n",
       "\n",
       "   Number of Open Accounts  Years of Credit History  Maximum Open Credit  \\\n",
       "0                     11.0                     26.3             685960.0   \n",
       "1                     15.0                     15.3            1181730.0   \n",
       "2                     11.0                     35.0            1182434.0   \n",
       "3                      8.0                     22.5             147400.0   \n",
       "4                     13.0                     13.6             385836.0   \n",
       "5                     12.0                     14.6             366784.0   \n",
       "6                      9.0                     20.3             388124.0   \n",
       "7                     13.0                     12.0             330374.0   \n",
       "8                     17.0                     15.7                  0.0   \n",
       "9                     10.0                     24.6             511302.0   \n",
       "\n",
       "   Number of Credit Problems  Months since last delinquent  Bankruptcies  \\\n",
       "0                        1.0                           NaN           1.0   \n",
       "1                        0.0                           NaN           0.0   \n",
       "2                        0.0                           NaN           0.0   \n",
       "3                        1.0                           NaN           1.0   \n",
       "4                        1.0                           NaN           0.0   \n",
       "5                        0.0                           NaN           0.0   \n",
       "6                        0.0                          73.0           0.0   \n",
       "7                        0.0                          18.0           0.0   \n",
       "8                        1.0                           NaN           1.0   \n",
       "9                        0.0                           6.0           0.0   \n",
       "\n",
       "              Purpose        Term  Current Loan Amount  \\\n",
       "0  debt consolidation  Short Term           99999999.0   \n",
       "1  debt consolidation   Long Term             264968.0   \n",
       "2  debt consolidation  Short Term           99999999.0   \n",
       "3  debt consolidation  Short Term             121396.0   \n",
       "4  debt consolidation  Short Term             125840.0   \n",
       "5               other   Long Term             337304.0   \n",
       "6   home improvements  Short Term           99999999.0   \n",
       "7               other  Short Term             250888.0   \n",
       "8   home improvements  Short Term             129734.0   \n",
       "9  debt consolidation   Long Term             572880.0   \n",
       "\n",
       "   Current Credit Balance  Monthly Debt  Credit Score  Credit Default  \n",
       "0                 47386.0        7914.0         749.0               0  \n",
       "1                394972.0       18373.0         737.0               1  \n",
       "2                308389.0       13651.0         742.0               0  \n",
       "3                 95855.0       11338.0         694.0               0  \n",
       "4                 93309.0        7180.0         719.0               0  \n",
       "5                165680.0       18692.0           NaN               1  \n",
       "6                 51623.0        2317.0         745.0               0  \n",
       "7                 89015.0       19761.0         705.0               1  \n",
       "8                    19.0          17.0           NaN               0  \n",
       "9                205333.0       17613.0           NaN               1  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据信息的查看"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "此时data是一个dataframe类型的对象，可以理解为dataframe类的实例。实例就具有类的属性和方法。\n",
    "1. 属性的调用格式为：实例名.属性名。\n",
    "2. 方法的调用格式为：实例名.方法名()。\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 7500 entries, 0 to 7499\n",
      "Data columns (total 18 columns):\n",
      " #   Column                        Non-Null Count  Dtype  \n",
      "---  ------                        --------------  -----  \n",
      " 0   Id                            7500 non-null   int64  \n",
      " 1   Home Ownership                7500 non-null   object \n",
      " 2   Annual Income                 5943 non-null   float64\n",
      " 3   Years in current job          7129 non-null   object \n",
      " 4   Tax Liens                     7500 non-null   float64\n",
      " 5   Number of Open Accounts       7500 non-null   float64\n",
      " 6   Years of Credit History       7500 non-null   float64\n",
      " 7   Maximum Open Credit           7500 non-null   float64\n",
      " 8   Number of Credit Problems     7500 non-null   float64\n",
      " 9   Months since last delinquent  3419 non-null   float64\n",
      " 10  Bankruptcies                  7486 non-null   float64\n",
      " 11  Purpose                       7500 non-null   object \n",
      " 12  Term                          7500 non-null   object \n",
      " 13  Current Loan Amount           7500 non-null   float64\n",
      " 14  Current Credit Balance        7500 non-null   float64\n",
      " 15  Monthly Debt                  7500 non-null   float64\n",
      " 16  Credit Score                  5943 non-null   float64\n",
      " 17  Credit Default                7500 non-null   int64  \n",
      "dtypes: float64(12), int64(2), object(4)\n",
      "memory usage: 1.0+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()       # 列名、非空值、数据类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(7500, 18)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape        # (行数, 列数) data的属性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Id', 'Home Ownership', 'Annual Income', 'Years in current job',\n",
       "       'Tax Liens', 'Number of Open Accounts', 'Years of Credit History',\n",
       "       'Maximum Open Credit', 'Number of Credit Problems',\n",
       "       'Months since last delinquent', 'Bankruptcies', 'Purpose', 'Term',\n",
       "       'Current Loan Amount', 'Current Credit Balance', 'Monthly Debt',\n",
       "       'Credit Score', 'Credit Default'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.columns      # 所有列名 data的属性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>Annual Income</th>\n",
       "      <th>Tax Liens</th>\n",
       "      <th>Number of Open Accounts</th>\n",
       "      <th>Years of Credit History</th>\n",
       "      <th>Maximum Open Credit</th>\n",
       "      <th>Number of Credit Problems</th>\n",
       "      <th>Months since last delinquent</th>\n",
       "      <th>Bankruptcies</th>\n",
       "      <th>Current Loan Amount</th>\n",
       "      <th>Current Credit Balance</th>\n",
       "      <th>Monthly Debt</th>\n",
       "      <th>Credit Score</th>\n",
       "      <th>Credit Default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>7500.000000</td>\n",
       "      <td>5.943000e+03</td>\n",
       "      <td>7500.000000</td>\n",
       "      <td>7500.000000</td>\n",
       "      <td>7500.000000</td>\n",
       "      <td>7.500000e+03</td>\n",
       "      <td>7500.000000</td>\n",
       "      <td>3419.000000</td>\n",
       "      <td>7486.000000</td>\n",
       "      <td>7.500000e+03</td>\n",
       "      <td>7.500000e+03</td>\n",
       "      <td>7500.000000</td>\n",
       "      <td>5943.000000</td>\n",
       "      <td>7500.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>3749.500000</td>\n",
       "      <td>1.366392e+06</td>\n",
       "      <td>0.030133</td>\n",
       "      <td>11.130933</td>\n",
       "      <td>18.317467</td>\n",
       "      <td>9.451537e+05</td>\n",
       "      <td>0.170000</td>\n",
       "      <td>34.692600</td>\n",
       "      <td>0.117152</td>\n",
       "      <td>1.187318e+07</td>\n",
       "      <td>2.898332e+05</td>\n",
       "      <td>18314.454133</td>\n",
       "      <td>1151.087498</td>\n",
       "      <td>0.281733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2165.207842</td>\n",
       "      <td>8.453392e+05</td>\n",
       "      <td>0.271604</td>\n",
       "      <td>4.908924</td>\n",
       "      <td>7.041946</td>\n",
       "      <td>1.602622e+07</td>\n",
       "      <td>0.498598</td>\n",
       "      <td>21.688806</td>\n",
       "      <td>0.347192</td>\n",
       "      <td>3.192612e+07</td>\n",
       "      <td>3.178714e+05</td>\n",
       "      <td>11926.764673</td>\n",
       "      <td>1604.451418</td>\n",
       "      <td>0.449874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.645970e+05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.124200e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>585.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1874.750000</td>\n",
       "      <td>8.443410e+05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>13.500000</td>\n",
       "      <td>2.792295e+05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>16.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.801690e+05</td>\n",
       "      <td>1.142565e+05</td>\n",
       "      <td>10067.500000</td>\n",
       "      <td>711.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>3749.500000</td>\n",
       "      <td>1.168386e+06</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>17.000000</td>\n",
       "      <td>4.781590e+05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.095730e+05</td>\n",
       "      <td>2.093230e+05</td>\n",
       "      <td>16076.500000</td>\n",
       "      <td>731.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>5624.250000</td>\n",
       "      <td>1.640137e+06</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>21.800000</td>\n",
       "      <td>7.935015e+05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>50.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5.198820e+05</td>\n",
       "      <td>3.604062e+05</td>\n",
       "      <td>23818.000000</td>\n",
       "      <td>743.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>7499.000000</td>\n",
       "      <td>1.014934e+07</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>43.000000</td>\n",
       "      <td>57.700000</td>\n",
       "      <td>1.304726e+09</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>118.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1.000000e+08</td>\n",
       "      <td>6.506797e+06</td>\n",
       "      <td>136679.000000</td>\n",
       "      <td>7510.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                Id  Annual Income    Tax Liens  Number of Open Accounts  \\\n",
       "count  7500.000000   5.943000e+03  7500.000000              7500.000000   \n",
       "mean   3749.500000   1.366392e+06     0.030133                11.130933   \n",
       "std    2165.207842   8.453392e+05     0.271604                 4.908924   \n",
       "min       0.000000   1.645970e+05     0.000000                 2.000000   \n",
       "25%    1874.750000   8.443410e+05     0.000000                 8.000000   \n",
       "50%    3749.500000   1.168386e+06     0.000000                10.000000   \n",
       "75%    5624.250000   1.640137e+06     0.000000                14.000000   \n",
       "max    7499.000000   1.014934e+07     7.000000                43.000000   \n",
       "\n",
       "       Years of Credit History  Maximum Open Credit  \\\n",
       "count              7500.000000         7.500000e+03   \n",
       "mean                 18.317467         9.451537e+05   \n",
       "std                   7.041946         1.602622e+07   \n",
       "min                   4.000000         0.000000e+00   \n",
       "25%                  13.500000         2.792295e+05   \n",
       "50%                  17.000000         4.781590e+05   \n",
       "75%                  21.800000         7.935015e+05   \n",
       "max                  57.700000         1.304726e+09   \n",
       "\n",
       "       Number of Credit Problems  Months since last delinquent  Bankruptcies  \\\n",
       "count                7500.000000                   3419.000000   7486.000000   \n",
       "mean                    0.170000                     34.692600      0.117152   \n",
       "std                     0.498598                     21.688806      0.347192   \n",
       "min                     0.000000                      0.000000      0.000000   \n",
       "25%                     0.000000                     16.000000      0.000000   \n",
       "50%                     0.000000                     32.000000      0.000000   \n",
       "75%                     0.000000                     50.000000      0.000000   \n",
       "max                     7.000000                    118.000000      4.000000   \n",
       "\n",
       "       Current Loan Amount  Current Credit Balance   Monthly Debt  \\\n",
       "count         7.500000e+03            7.500000e+03    7500.000000   \n",
       "mean          1.187318e+07            2.898332e+05   18314.454133   \n",
       "std           3.192612e+07            3.178714e+05   11926.764673   \n",
       "min           1.124200e+04            0.000000e+00       0.000000   \n",
       "25%           1.801690e+05            1.142565e+05   10067.500000   \n",
       "50%           3.095730e+05            2.093230e+05   16076.500000   \n",
       "75%           5.198820e+05            3.604062e+05   23818.000000   \n",
       "max           1.000000e+08            6.506797e+06  136679.000000   \n",
       "\n",
       "       Credit Score  Credit Default  \n",
       "count   5943.000000     7500.000000  \n",
       "mean    1151.087498        0.281733  \n",
       "std     1604.451418        0.449874  \n",
       "min      585.000000        0.000000  \n",
       "25%      711.000000        0.000000  \n",
       "50%      731.000000        0.000000  \n",
       "75%      743.000000        1.000000  \n",
       "max     7510.000000        1.000000  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe()   # 数值列的基本统计量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Id',\n",
       " 'Home Ownership',\n",
       " 'Annual Income',\n",
       " 'Years in current job',\n",
       " 'Tax Liens',\n",
       " 'Number of Open Accounts',\n",
       " 'Years of Credit History',\n",
       " 'Maximum Open Credit',\n",
       " 'Number of Credit Problems',\n",
       " 'Months since last delinquent',\n",
       " 'Bankruptcies',\n",
       " 'Purpose',\n",
       " 'Term',\n",
       " 'Current Loan Amount',\n",
       " 'Current Credit Balance',\n",
       " 'Monthly Debt',\n",
       " 'Credit Score',\n",
       " 'Credit Default']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(data.columns)  #把列名用列表的样子展示出来"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 7500 entries, 0 to 7499\n",
      "Data columns (total 18 columns):\n",
      " #   Column                        Non-Null Count  Dtype  \n",
      "---  ------                        --------------  -----  \n",
      " 0   Id                            7500 non-null   int64  \n",
      " 1   Home Ownership                7500 non-null   object \n",
      " 2   Annual Income                 5943 non-null   float64\n",
      " 3   Years in current job          7129 non-null   object \n",
      " 4   Tax Liens                     7500 non-null   float64\n",
      " 5   Number of Open Accounts       7500 non-null   float64\n",
      " 6   Years of Credit History       7500 non-null   float64\n",
      " 7   Maximum Open Credit           7500 non-null   float64\n",
      " 8   Number of Credit Problems     7500 non-null   float64\n",
      " 9   Months since last delinquent  3419 non-null   float64\n",
      " 10  Bankruptcies                  7486 non-null   float64\n",
      " 11  Purpose                       7500 non-null   object \n",
      " 12  Term                          7500 non-null   object \n",
      " 13  Current Loan Amount           7500 non-null   float64\n",
      " 14  Current Credit Balance        7500 non-null   float64\n",
      " 15  Monthly Debt                  7500 non-null   float64\n",
      " 16  Credit Score                  5943 non-null   float64\n",
      " 17  Credit Default                7500 non-null   int64  \n",
      "dtypes: float64(12), int64(2), object(4)\n",
      "memory usage: 1.0+ MB\n"
     ]
    }
   ],
   "source": [
    "# dtype是data type的缩写，用于描述数据类型。后续会频繁借助这个方法来查看某一列数据的属性\n",
    "data.dtypes       # 各列数据类型\n",
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('float64')"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[\"Annual Income\"].dtype # 查看某一列的数据类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>Home Ownership</th>\n",
       "      <th>Annual Income</th>\n",
       "      <th>Years in current job</th>\n",
       "      <th>Tax Liens</th>\n",
       "      <th>Number of Open Accounts</th>\n",
       "      <th>Years of Credit History</th>\n",
       "      <th>Maximum Open Credit</th>\n",
       "      <th>Number of Credit Problems</th>\n",
       "      <th>Months since last delinquent</th>\n",
       "      <th>Bankruptcies</th>\n",
       "      <th>Purpose</th>\n",
       "      <th>Term</th>\n",
       "      <th>Current Loan Amount</th>\n",
       "      <th>Current Credit Balance</th>\n",
       "      <th>Monthly Debt</th>\n",
       "      <th>Credit Score</th>\n",
       "      <th>Credit Default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7495</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7496</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7497</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7498</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7499</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7500 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Id  Home Ownership  Annual Income  Years in current job  Tax Liens  \\\n",
       "0     False           False          False                  True      False   \n",
       "1     False           False          False                 False      False   \n",
       "2     False           False          False                 False      False   \n",
       "3     False           False          False                 False      False   \n",
       "4     False           False          False                 False      False   \n",
       "...     ...             ...            ...                   ...        ...   \n",
       "7495  False           False          False                 False      False   \n",
       "7496  False           False          False                 False      False   \n",
       "7497  False           False          False                 False      False   \n",
       "7498  False           False           True                  True      False   \n",
       "7499  False           False           True                 False      False   \n",
       "\n",
       "      Number of Open Accounts  Years of Credit History  Maximum Open Credit  \\\n",
       "0                       False                    False                False   \n",
       "1                       False                    False                False   \n",
       "2                       False                    False                False   \n",
       "3                       False                    False                False   \n",
       "4                       False                    False                False   \n",
       "...                       ...                      ...                  ...   \n",
       "7495                    False                    False                False   \n",
       "7496                    False                    False                False   \n",
       "7497                    False                    False                False   \n",
       "7498                    False                    False                False   \n",
       "7499                    False                    False                False   \n",
       "\n",
       "      Number of Credit Problems  Months since last delinquent  Bankruptcies  \\\n",
       "0                         False                          True         False   \n",
       "1                         False                          True         False   \n",
       "2                         False                          True         False   \n",
       "3                         False                          True         False   \n",
       "4                         False                          True         False   \n",
       "...                         ...                           ...           ...   \n",
       "7495                      False                          True         False   \n",
       "7496                      False                         False         False   \n",
       "7497                      False                          True         False   \n",
       "7498                      False                          True         False   \n",
       "7499                      False                          True         False   \n",
       "\n",
       "      Purpose   Term  Current Loan Amount  Current Credit Balance  \\\n",
       "0       False  False                False                   False   \n",
       "1       False  False                False                   False   \n",
       "2       False  False                False                   False   \n",
       "3       False  False                False                   False   \n",
       "4       False  False                False                   False   \n",
       "...       ...    ...                  ...                     ...   \n",
       "7495    False  False                False                   False   \n",
       "7496    False  False                False                   False   \n",
       "7497    False  False                False                   False   \n",
       "7498    False  False                False                   False   \n",
       "7499    False  False                False                   False   \n",
       "\n",
       "      Monthly Debt  Credit Score  Credit Default  \n",
       "0            False         False           False  \n",
       "1            False         False           False  \n",
       "2            False         False           False  \n",
       "3            False         False           False  \n",
       "4            False         False           False  \n",
       "...            ...           ...             ...  \n",
       "7495         False         False           False  \n",
       "7496         False         False           False  \n",
       "7497         False         False           False  \n",
       "7498         False          True           False  \n",
       "7499         False          True           False  \n",
       "\n",
       "[7500 rows x 18 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull()            # 布尔矩阵显示缺失值，这个方法返回一个布尔矩阵，也是dataframe对象，其中True表示对应位置的值是缺失值，False表示对应位置的值不是缺失值。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(data.isnull())         # 布尔矩阵显示缺失值，这个方法返回一个布尔矩阵，其中True表示对应位置的值是缺失值，False表示对应位置的值不是缺失值。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>Home Ownership</th>\n",
       "      <th>Annual Income</th>\n",
       "      <th>Years in current job</th>\n",
       "      <th>Tax Liens</th>\n",
       "      <th>Number of Open Accounts</th>\n",
       "      <th>Years of Credit History</th>\n",
       "      <th>Maximum Open Credit</th>\n",
       "      <th>Number of Credit Problems</th>\n",
       "      <th>Months since last delinquent</th>\n",
       "      <th>Bankruptcies</th>\n",
       "      <th>Purpose</th>\n",
       "      <th>Term</th>\n",
       "      <th>Current Loan Amount</th>\n",
       "      <th>Current Credit Balance</th>\n",
       "      <th>Monthly Debt</th>\n",
       "      <th>Credit Score</th>\n",
       "      <th>Credit Default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7495</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7496</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7497</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7498</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7499</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7500 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Id  Home Ownership  Annual Income  Years in current job  Tax Liens  \\\n",
       "0     False           False          False                  True      False   \n",
       "1     False           False          False                 False      False   \n",
       "2     False           False          False                 False      False   \n",
       "3     False           False          False                 False      False   \n",
       "4     False           False          False                 False      False   \n",
       "...     ...             ...            ...                   ...        ...   \n",
       "7495  False           False          False                 False      False   \n",
       "7496  False           False          False                 False      False   \n",
       "7497  False           False          False                 False      False   \n",
       "7498  False           False           True                  True      False   \n",
       "7499  False           False           True                 False      False   \n",
       "\n",
       "      Number of Open Accounts  Years of Credit History  Maximum Open Credit  \\\n",
       "0                       False                    False                False   \n",
       "1                       False                    False                False   \n",
       "2                       False                    False                False   \n",
       "3                       False                    False                False   \n",
       "4                       False                    False                False   \n",
       "...                       ...                      ...                  ...   \n",
       "7495                    False                    False                False   \n",
       "7496                    False                    False                False   \n",
       "7497                    False                    False                False   \n",
       "7498                    False                    False                False   \n",
       "7499                    False                    False                False   \n",
       "\n",
       "      Number of Credit Problems  Months since last delinquent  Bankruptcies  \\\n",
       "0                         False                          True         False   \n",
       "1                         False                          True         False   \n",
       "2                         False                          True         False   \n",
       "3                         False                          True         False   \n",
       "4                         False                          True         False   \n",
       "...                         ...                           ...           ...   \n",
       "7495                      False                          True         False   \n",
       "7496                      False                         False         False   \n",
       "7497                      False                          True         False   \n",
       "7498                      False                          True         False   \n",
       "7499                      False                          True         False   \n",
       "\n",
       "      Purpose   Term  Current Loan Amount  Current Credit Balance  \\\n",
       "0       False  False                False                   False   \n",
       "1       False  False                False                   False   \n",
       "2       False  False                False                   False   \n",
       "3       False  False                False                   False   \n",
       "4       False  False                False                   False   \n",
       "...       ...    ...                  ...                     ...   \n",
       "7495    False  False                False                   False   \n",
       "7496    False  False                False                   False   \n",
       "7497    False  False                False                   False   \n",
       "7498    False  False                False                   False   \n",
       "7499    False  False                False                   False   \n",
       "\n",
       "      Monthly Debt  Credit Score  Credit Default  \n",
       "0            False         False           False  \n",
       "1            False         False           False  \n",
       "2            False         False           False  \n",
       "3            False         False           False  \n",
       "4            False         False           False  \n",
       "...            ...           ...             ...  \n",
       "7495         False         False           False  \n",
       "7496         False         False           False  \n",
       "7497         False         False           False  \n",
       "7498         False          True           False  \n",
       "7499         False          True           False  \n",
       "\n",
       "[7500 rows x 18 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull()            # 布尔矩阵显示缺失值，这个方法返回一个布尔矩阵，其中True表示对应位置的值是缺失值，False表示对应位置的值不是缺失值。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>Home Ownership</th>\n",
       "      <th>Annual Income</th>\n",
       "      <th>Years in current job</th>\n",
       "      <th>Tax Liens</th>\n",
       "      <th>Number of Open Accounts</th>\n",
       "      <th>Years of Credit History</th>\n",
       "      <th>Maximum Open Credit</th>\n",
       "      <th>Number of Credit Problems</th>\n",
       "      <th>Months since last delinquent</th>\n",
       "      <th>Bankruptcies</th>\n",
       "      <th>Purpose</th>\n",
       "      <th>Term</th>\n",
       "      <th>Current Loan Amount</th>\n",
       "      <th>Current Credit Balance</th>\n",
       "      <th>Monthly Debt</th>\n",
       "      <th>Credit Score</th>\n",
       "      <th>Credit Default</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7495</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7496</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7497</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7498</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7499</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7500 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Id  Home Ownership  Annual Income  Years in current job  Tax Liens  \\\n",
       "0     False           False          False                  True      False   \n",
       "1     False           False          False                 False      False   \n",
       "2     False           False          False                 False      False   \n",
       "3     False           False          False                 False      False   \n",
       "4     False           False          False                 False      False   \n",
       "...     ...             ...            ...                   ...        ...   \n",
       "7495  False           False          False                 False      False   \n",
       "7496  False           False          False                 False      False   \n",
       "7497  False           False          False                 False      False   \n",
       "7498  False           False           True                  True      False   \n",
       "7499  False           False           True                 False      False   \n",
       "\n",
       "      Number of Open Accounts  Years of Credit History  Maximum Open Credit  \\\n",
       "0                       False                    False                False   \n",
       "1                       False                    False                False   \n",
       "2                       False                    False                False   \n",
       "3                       False                    False                False   \n",
       "4                       False                    False                False   \n",
       "...                       ...                      ...                  ...   \n",
       "7495                    False                    False                False   \n",
       "7496                    False                    False                False   \n",
       "7497                    False                    False                False   \n",
       "7498                    False                    False                False   \n",
       "7499                    False                    False                False   \n",
       "\n",
       "      Number of Credit Problems  Months since last delinquent  Bankruptcies  \\\n",
       "0                         False                          True         False   \n",
       "1                         False                          True         False   \n",
       "2                         False                          True         False   \n",
       "3                         False                          True         False   \n",
       "4                         False                          True         False   \n",
       "...                         ...                           ...           ...   \n",
       "7495                      False                          True         False   \n",
       "7496                      False                         False         False   \n",
       "7497                      False                          True         False   \n",
       "7498                      False                          True         False   \n",
       "7499                      False                          True         False   \n",
       "\n",
       "      Purpose   Term  Current Loan Amount  Current Credit Balance  \\\n",
       "0       False  False                False                   False   \n",
       "1       False  False                False                   False   \n",
       "2       False  False                False                   False   \n",
       "3       False  False                False                   False   \n",
       "4       False  False                False                   False   \n",
       "...       ...    ...                  ...                     ...   \n",
       "7495    False  False                False                   False   \n",
       "7496    False  False                False                   False   \n",
       "7497    False  False                False                   False   \n",
       "7498    False  False                False                   False   \n",
       "7499    False  False                False                   False   \n",
       "\n",
       "      Monthly Debt  Credit Score  Credit Default  \n",
       "0            False         False           False  \n",
       "1            False         False           False  \n",
       "2            False         False           False  \n",
       "3            False         False           False  \n",
       "4            False         False           False  \n",
       "...            ...           ...             ...  \n",
       "7495         False         False           False  \n",
       "7496         False         False           False  \n",
       "7497         False         False           False  \n",
       "7498         False          True           False  \n",
       "7499         False          True           False  \n",
       "\n",
       "[7500 rows x 18 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull()            # 布尔矩阵显示缺失值，这个方法返回一个布尔矩阵，其中True表示对应位置的值是缺失值，False表示对应位置的值不是缺失值。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Id                                 0\n",
       "Home Ownership                     0\n",
       "Annual Income                   1557\n",
       "Years in current job             371\n",
       "Tax Liens                          0\n",
       "Number of Open Accounts            0\n",
       "Years of Credit History            0\n",
       "Maximum Open Credit                0\n",
       "Number of Credit Problems          0\n",
       "Months since last delinquent    4081\n",
       "Bankruptcies                      14\n",
       "Purpose                            0\n",
       "Term                               0\n",
       "Current Loan Amount                0\n",
       "Current Credit Balance             0\n",
       "Monthly Debt                       0\n",
       "Credit Score                    1557\n",
       "Credit Default                     0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull().sum()      # 每列缺失值计数,sum方法为求每一列的和"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 缺失值的填补\n",
    "尝试用众数 中位数进行填补"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        482087.0\n",
       "1       1025487.0\n",
       "2        751412.0\n",
       "3        805068.0\n",
       "4        776264.0\n",
       "          ...    \n",
       "7495     402192.0\n",
       "7496    1533984.0\n",
       "7497    1878910.0\n",
       "7498          NaN\n",
       "7499          NaN\n",
       "Name: Annual Income, Length: 7500, dtype: float64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['Annual Income']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(data['Annual Income'])\n",
    "# dataframe里单独的一列是series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1168386.0"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算 'Annual Income' 列的中位数（会自动忽略 NaN 值）\n",
    "median_income = data['Annual Income'].median()\n",
    "median_income"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用计算出的中位数填补该列的 NaN 值\n",
    "# inplace=True 参数表示直接在原 DataFrame 上进行修改\n",
    "# 如果不设置该参数，fillna() 方法会返回一个新的 DataFrame，原 DataFrame 不会被修改\n",
    "data['Annual Income'].fillna(median_income, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 检查下是否有缺失值\n",
    "data['Annual Income'].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     969475.0\n",
       "1    1043651.0\n",
       "2    1058376.0\n",
       "3    1161660.0\n",
       "4    1338113.0\n",
       "Name: Annual Income, dtype: float64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用众数填充缺失值\n",
    "import pandas as pd\n",
    "data = pd.read_csv('data.csv') #需要重新读取一遍数据\n",
    "mode = data['Annual Income'].mode()\n",
    "# mode() 会返回数据中出现频率最高的所有值，如果频次相同，会返回最多每个值。\n",
    "mode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 这里返回了4个最多频次的值，我们一般保留第一个\n",
    "mode = mode[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 众数填补\n",
    "data['Annual Income'].fillna(mode, inplace=True)\n",
    "# 检查下是否有缺失值\n",
    "data['Annual Income'].isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 填补所有的数值型缺失值\n",
    "从上面的打印信息可以看到，缺失值的数值列包括：Annual Income、Months since last delinquent、Bankruptcies、Credit Score。这些列的缺失值数量分别为1557、4081、14和1557。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Id', 'Home Ownership', 'Annual Income', 'Years in current job',\n",
       "       'Tax Liens', 'Number of Open Accounts', 'Years of Credit History',\n",
       "       'Maximum Open Credit', 'Number of Credit Problems',\n",
       "       'Months since last delinquent', 'Bankruptcies', 'Purpose', 'Term',\n",
       "       'Current Loan Amount', 'Current Credit Balance', 'Monthly Debt',\n",
       "       'Credit Score', 'Credit Default'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.indexes.base.Index"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(data.columns)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "很自然的 我们想要遍历打印这个对象中的每一个元素，如果是数值，就补全缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1, 2, 3]"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 介绍一下tolist方法，将numpy数组和pandas对象转换成list\n",
    "import numpy as np\n",
    "a =np.array([1,2,3])\n",
    "a.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c = data.columns.tolist()\n",
    "type(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Id                                0\n",
       "Home Ownership                    0\n",
       "Annual Income                     0\n",
       "Years in current job            371\n",
       "Tax Liens                         0\n",
       "Number of Open Accounts           0\n",
       "Years of Credit History           0\n",
       "Maximum Open Credit               0\n",
       "Number of Credit Problems         0\n",
       "Months since last delinquent      0\n",
       "Bankruptcies                      0\n",
       "Purpose                           0\n",
       "Term                              0\n",
       "Current Loan Amount               0\n",
       "Current Credit Balance            0\n",
       "Monthly Debt                      0\n",
       "Credit Score                      0\n",
       "Credit Default                    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 循环遍历c这个列表中的每一列\n",
    "for i in c:\n",
    "    # 找到为数值型的列\n",
    "    if data[i].dtype != 'object': # 找到为数值型的列\n",
    "        if data[i].isnull().sum() > 0: # 找到存在缺失值的列\n",
    "            #计算该列的均值\n",
    "            mean_value = data[i].mean()\n",
    "            #用均值填充缺失值\n",
    "            data[i].fillna(mean_value, inplace=True)\n",
    "\n",
    "data.isnull().sum()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pylearn",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.21"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
